@InProceedings{MaiaVieiPedr:2021:ViRhCo,
author = "Maia, Helena de Almeida and Vieira, Marcelo Bernardes and Pedrini,
Helio",
affiliation = "UNICAMP and UFJF and UNICAMP",
title = "Visual rhythm-based convolutional neural networks and adaptive
fusion for a multi-stream architecture applied to human action
recognition",
booktitle = "Proceedings...",
year = "2021",
editor = "Paiva, Afonso and Menotti, David and Baranoski, Gladimir V. G. and
Proen{\c{c}}a, Hugo Pedro and Junior, Antonio Lopes Apolinario
and Papa, Jo{\~a}o Paulo and Pagliosa, Paulo and dos Santos,
Thiago Oliveira and e S{\'a}, Asla Medeiros and da Silveira,
Thiago Lopes Trugillo and Brazil, Emilio Vital and Ponti, Moacir
A. and Fernandes, Leandro A. F. and Avila, Sandra",
organization = "Conference on Graphics, Patterns and Images, 34. (SIBGRAPI)",
publisher = "Sociedade Brasileira de Computa{\c{c}}{\~a}o",
address = "Porto Alegre",
keywords = "action recognition, visual rhythm, multi-stream architecture.",
abstract = "In this work, we address the problem of human action recognition
in videos. We propose and analyze a multi-stream architecture
containing image-based networks pre-trained on the large ImageNet.
Different image representations are extracted from the videos to
feed the streams, in order to provide complementary information
for the system. Here, we propose new streams based on visual
rhythm that encodes longer-term information when compared to still
frames and optical flow. Our main contribution is a stream based
on a new variant of the visual rhythm called Learnable Visual
Rhythm (LVR) formed by the outputs of a deep network. The features
are collected at multiple depths to enable the analysis of
different abstraction levels. This strategy significantly
outperforms the handcrafted version on the UCF101 and HMDB51
datasets. We also investigate many combinations of the streams to
identify the modalities that better complement each other.
Experiments conducted on the two datasets show that our
multi-stream network achieved competitive results compared to
state-of-the-art approaches.",
conference-location = "Gramado, RS, Brazil (virtual)",
conference-year = "18-22 Oct. 2021",
language = "en",
ibi = "8JMKD3MGPEW34M/45CU66B",
url = "http://urlib.net/ibi/8JMKD3MGPEW34M/45CU66B",
targetfile = "camera_ready.pdf",
urlaccessdate = "2024, Apr. 28"
}